#Import and clean socio-economic data. neighbor_SES has SES indicators (except rent) for each neighborhood
neighbor_ses = readxl::read_excel("data/neighorhood_indicators.xlsx", sheet = "Data") |>
janitor::clean_names() |>
filter(region_type == "Sub-Borough Area") |>
rename(neighborhood = region_name) |>
select(neighborhood, year, pop_num, hh_inc_med_adj, pop16_unemp_pct, pop_edu_collp_pct, pop_edu_nohs_pct, pop_pov_pct, pop_race_asian_pct, pop_race_black_pct, pop_race_hisp_pct, pop_race_white_pct, pop_foreign_pct) |>
filter(year %in% c(2017, 2018, 2019, 2020, 2021, 2022))
#neighbor_rent has rent data for each neighborhood
neighbor_rent = readxl::read_excel("data/neighorhood_indicators.xlsx", sheet = "Data") |>
janitor::clean_names() |>
filter(region_type == "Sub-Borough Area") |>
rename(neighborhood = region_name) |>
filter(year == "2017-2021") |>
select(neighborhood, gross_rent_0_1beds, gross_rent_2_3beds)
#ses_df has crime rate and SES indicators for each neighborhood
ses_df = nypd_ses_df |>
group_by(year, borough, neighborhood) |>
summarise(crime_num = n())
ses_df = ses_df |> merge(neighbor_ses, by = c("year", "neighborhood")) |>
mutate(crime_rate = (crime_num/pop_num) * 100,000) |>
left_join(neighbor_rent, by = "neighborhood")
Heatmap
# NYC neighborhoods borders
nyc = read_sf(here::here("data", "NTA_map.geojson")) |>
select(-shape_area, -shape_leng, -ntacode)
#
nypd_ses_df_heat = nypd_ses_df |>
st_as_sf(
# which columns to use as coordinates
coords=c('longitude', 'latitude'),
# keep the coordinate columns
remove=FALSE,
# projection system
crs=4326
) |>
select(cmplnt_num, law_cat_cd, latitude, longitude) |>
filter(law_cat_cd == "VIOLATION")
nyc |> select(geometry) |> plot()
nypd_ses_df_heat |> select(geometry) |> plot(col='blue', add=TRUE)

# ggplot
ggplot(nypd_ses_df_heat) +
geom_sf(data = nyc) +
geom_sf() +
coord_sf()

# spatial join
felony_in_neighborhoods = nypd_ses_df_heat |>
# only care about the felony and geometry
select(cmplnt_num, geometry) |>
# spatial join
st_join(
# only need these columns from neighborhood tibble
nyc |> select(boroname, ntaname, geometry),
# join rows where there is some over lap between
# a dock and a neighborhod
join = st_intersects,
# keep only docks that are in a neighborhood
# this will ignore Jersey City
left = FALSE
)
count_by_neighborhood = felony_in_neighborhoods |>
# remove geometry for fast counting
st_drop_geometry() |>
count(ntaname) |>
# join the counts into the nyc neighborhood object
right_join(nyc, by=c('ntaname'='ntaname')) |>
st_as_sf() |>
select(ntaname, n, boroname, geometry)
ggplot(count_by_neighborhood, aes(fill = n)) +
geom_sf() +
coord_sf() +
theme_bw()

# tmap
tm_shape(nypd_ses_df_heat) +
tm_dots()

tm_shape(nyc) +
tm_polygons() +
tm_shape(nypd_ses_df_heat) +
tm_dots()

tm_shape(count_by_neighborhood) +
tm_polygons(col = "n")

# leaflet
leaflet(elementId = "nypd_ses_df_heat") |>
addTiles() |>
addCircles(data = nypd_ses_df_heat)